#!/usr/bin/python
#coding:utf-8

import urllib2
from bs4 import BeautifulSoup
#设置默认encoding方式
import sys
reload(sys)
sys.setdefaultencoding('gbk')
url = 'http://news.baidu.com/'
#待抓取的网页地址
content = urllib2.urlopen(url).read()
#获取网页的html文本
#使用BeautifulSoup解析html
soup = BeautifulSoup(content, from_encoding = 'gbk')
#识别热点新闻
hotNews = soup.find_all('div', {'class', 'hotnews'})[0].find_all('li')
for i in hotNews:
    print i.a.text #打印新闻标题
    print i.a['href'] #打印新闻链接